(***********************************************************************)
(*                                                                     *)
(*                           Objective Caml                            *)
(*                                                                     *)
(*            Xavier Leroy, projet Cristal,                            *)
(*            Luc Maranget, projet Moscova,                            *)
(*                  INRIA Rocquencourt                                 *)
(*                                                                     *)
(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
(*  en Automatique.  All rights reserved.  This file is distributed    *)
(*  under the terms of the Q Public License version 1.0.               *)
(*                                                                     *)
(***********************************************************************)

(* $Id$ *)

(* Compiling a lexer definition *)

open Syntax
open Printf

exception Memory_overflow

(* Deep abstract syntax for regular expressions *)

type tag_info = {id : string ; start : bool ; action : int}

type regexp =
    Empty
  | Chars of int * bool
  | Action of int
  | Tag of tag_info
  | Seq of regexp * regexp
  | Alt of regexp * regexp
  | Star of regexp

type tag_base = Start | End | Mem of int
type tag_addr = Sum of (tag_base * int)        
type ident_info =
  | Ident_string of bool * tag_addr * tag_addr
  | Ident_char of bool * tag_addr
type t_env = (string * ident_info) list

type ('args,'action) lexer_entry =
  { lex_name: string;
    lex_regexp: regexp;
    lex_mem_tags: int ;
    lex_actions: (int *  t_env * 'action) list }


type automata =
    Perform of int * tag_action list
  | Shift of automata_trans * (automata_move * memory_action list) array

and automata_trans =
    No_remember
  | Remember of int * tag_action list

and automata_move =
    Backtrack
  | Goto of int

and memory_action =
  | Copy of int * int
  | Set of int

and tag_action = SetTag of int * int | EraseTag of int

(* Representation of entry points *)

type ('args,'action) automata_entry =
  { auto_name: string;
    auto_args: 'args ;
    auto_mem_size : int ;
    auto_initial_state: int * memory_action list;
    auto_actions: (int * t_env * 'action) list }


(* A lot of sets and map structures *)

module Ints = Set.Make(struct type t = int let compare = compare end)

module Tags = Set.Make(struct type t = tag_info let compare = compare end)

module TagMap =
  Map.Make (struct type t = tag_info let compare = compare end)

module StringSet =
  Set.Make (struct type t = string let compare = Pervasives.compare end)
module StringMap =
  Map.Make (struct type t = string let compare = Pervasives.compare end)

(*********************)
(* Variable cleaning *)
(*********************)

(* Silently eliminate nested variables *)

let rec do_remove_nested to_remove = function
  | Bind (e,x) ->
      if StringSet.mem x to_remove then
        do_remove_nested to_remove e
      else
        Bind (do_remove_nested (StringSet.add x to_remove) e, x)
  | Epsilon|Eof|Characters _ as e -> e
  | Sequence (e1, e2) ->
      Sequence
        (do_remove_nested to_remove  e1, do_remove_nested to_remove  e2)
  | Alternative (e1, e2) ->
      Alternative
        (do_remove_nested to_remove  e1, do_remove_nested to_remove  e2)
  | Repetition e ->
      Repetition (do_remove_nested to_remove  e)

let remove_nested_as e = do_remove_nested StringSet.empty e

(*********************)
(* Variable analysis *)
(*********************)

(*
  Optional variables.
   A variable is optional when matching of regexp does not
   implies it binds.
     The typical case is:
       ("" | 'a' as x) -> optional
       ("" as x | 'a' as x) -> non-optional
*)

let stringset_delta s1 s2 =
  StringSet.union
    (StringSet.diff s1 s2)
    (StringSet.diff s2 s1)

let rec find_all_vars = function
  | Characters _|Epsilon|Eof ->
      StringSet.empty
  | Bind (e,x) ->
      StringSet.add x (find_all_vars e)
  | Sequence (e1,e2)|Alternative (e1,e2) ->
      StringSet.union (find_all_vars e1) (find_all_vars e2)
  | Repetition e -> find_all_vars e


let rec do_find_opt = function
  | Characters _|Epsilon|Eof -> StringSet.empty, StringSet.empty
  | Bind (e,x) ->
      let opt,all = do_find_opt e in
      opt, StringSet.add x all
  | Sequence (e1,e2) ->
      let opt1,all1 = do_find_opt e1
      and opt2,all2 = do_find_opt e2 in
      StringSet.union opt1 opt2, StringSet.union all1 all2
  | Alternative (e1,e2) ->
      let opt1,all1 = do_find_opt e1
      and opt2,all2 = do_find_opt e2 in
      StringSet.union
        (StringSet.union opt1 opt2)
        (stringset_delta all1 all2),
      StringSet.union all1 all2
  | Repetition e  ->
      let r = find_all_vars e in
      r,r

let find_optional e =
  let r,_ = do_find_opt e in r

(*
   Double variables
   A variable is double when it can be bound more than once
   in a single matching
     The typical case is:
       (e1 as x) (e2 as x)

*)

let rec do_find_double = function
  | Characters _|Epsilon|Eof -> StringSet.empty, StringSet.empty
  | Bind (e,x) ->
      let dbl,all = do_find_double e in
      (if StringSet.mem x all then
        StringSet.add x dbl
      else
        dbl),
      StringSet.add x all
  | Sequence (e1,e2) ->
      let dbl1, all1 = do_find_double e1
      and dbl2, all2 = do_find_double e2 in
      StringSet.union
        (StringSet.inter all1 all2)
        (StringSet.union dbl1 dbl2),
      StringSet.union all1 all2
  | Alternative (e1,e2) ->
      let dbl1, all1 = do_find_double e1
      and dbl2, all2 = do_find_double e2 in
      StringSet.union dbl1 dbl2,
      StringSet.union all1 all2
  | Repetition e ->
      let r = find_all_vars e in
      r,r

let find_double e = do_find_double e

(*
   Type of variables:
    A variable is bound to a char when all its occurences
    bind a pattern of length 1.
     The typical case is:
       (_ as x) -> char
*)

let add_some x = function
  | Some i -> Some (x+i)
  | None   -> None

let add_some_some x y = match x,y with
| Some i, Some j -> Some (i+j)
| _,_            -> None

let rec do_find_chars sz = function
  | Epsilon|Eof    -> StringSet.empty, StringSet.empty, sz
  | Characters _ -> StringSet.empty, StringSet.empty, add_some 1 sz
  | Bind (e,x)   ->
      let c,s,e_sz = do_find_chars (Some 0) e in
      begin match e_sz  with
      | Some 1 ->
          StringSet.add x c,s,add_some 1 sz
      | _ ->
          c, StringSet.add x s, add_some_some sz e_sz
      end
  | Sequence (e1,e2) ->
      let c1,s1,sz1 = do_find_chars sz e1 in
      let c2,s2,sz2 = do_find_chars sz1 e2 in
      StringSet.union c1 c2,
      StringSet.union s1 s2,
      sz2
  | Alternative (e1,e2) ->
      let c1,s1,sz1 = do_find_chars sz e1
      and c2,s2,sz2 = do_find_chars sz e2 in
      StringSet.union c1 c2,
      StringSet.union s1 s2,
      (if sz1 = sz2 then sz1 else None)
  | Repetition e -> do_find_chars None e



let find_chars e =
  let c,s,_ = do_find_chars (Some 0) e in
  StringSet.diff c s

(*******************************)
(* From shallow to deep syntax *)
(*******************************)

let chars = ref ([] : Cset.t list)
let chars_count = ref 0


let rec encode_regexp char_vars act = function
    Epsilon -> Empty
  | Characters cl ->
      let n = !chars_count in
      chars := cl :: !chars;
      incr chars_count;
      Chars(n,false)
  | Eof ->
      let n = !chars_count in
      chars := Cset.eof :: !chars;
      incr chars_count;
      Chars(n,true)
  | Sequence(r1,r2) ->
      let r1 = encode_regexp char_vars act r1 in
      let r2 = encode_regexp char_vars act r2 in
      Seq (r1, r2)
  | Alternative(r1,r2) ->
      let r1 = encode_regexp char_vars act r1 in
      let r2 = encode_regexp char_vars act r2 in
      Alt(r1, r2)
  | Repetition r ->
      let r = encode_regexp char_vars act r in
      Star r
  | Bind (r,x) ->
      let r = encode_regexp char_vars act r in
      if StringSet.mem x char_vars then
        Seq (Tag {id=x ; start=true ; action=act},r)
      else
        Seq (Tag {id=x ; start=true ; action=act},
          Seq (r, Tag {id=x ; start=false ; action=act}))


(* Optimisation,
    Static optimization :
      Replace tags by offsets relative to the beginning
      or end of matched string. 
    Dynamic optimization:
      Replace some non-optional, non-double tags by offsets w.r.t
      a previous similar tag.
*)

let incr_pos = function
  | None   -> None
  | Some i -> Some (i+1)

let decr_pos = function
  | None -> None
  | Some i -> Some (i-1)


let opt = true

let mk_seq r1 r2 = match r1,r2  with
| Empty,_ -> r2
| _,Empty -> r1
| _,_     -> Seq (r1,r2)

let add_pos p i = match p with
| Some (Sum (a,n)) -> Some (Sum (a,n+i))
| None -> None

let opt_regexp all_vars char_vars optional_vars double_vars r =

(* From removed tags to their addresses *)
  let env = Hashtbl.create 17 in

(* First static optimizations, from start position *)
  let rec size_forward pos = function
    | Empty|Chars (_,true)|Tag _ -> Some pos
    | Chars (_,false) -> Some (pos+1)
    | Seq (r1,r2) ->
        begin match size_forward pos r1 with
        | None -> None
        | Some pos  -> size_forward pos r2
        end
    | Alt (r1,r2) ->
        let pos1 = size_forward pos r1
        and pos2 = size_forward pos r2 in
        if pos1=pos2 then pos1 else None
    | Star _ -> None
    | Action _ -> assert false in

  let rec simple_forward pos r = match r with
    | Tag n ->
        if StringSet.mem n.id double_vars then
          r,Some pos
        else begin
          Hashtbl.add env (n.id,n.start) (Sum (Start, pos)) ;
          Empty,Some pos
        end
    | Empty -> r, Some pos
    | Chars (_,is_eof) ->
        r,Some (if is_eof then  pos else pos+1)
    | Seq (r1,r2) ->
        let r1,pos = simple_forward pos r1 in
        begin match pos with
        | None -> mk_seq r1 r2,None
        | Some pos ->
            let r2,pos = simple_forward pos r2 in
            mk_seq r1 r2,pos
        end
    | Alt (r1,r2) ->
        let pos1 = size_forward pos r1
        and pos2 = size_forward pos r2 in
        r,(if pos1=pos2 then pos1 else None)
    | Star _ -> r,None
    | Action _ -> assert false in

(* Then static optimizations, from end position *)
  let rec size_backward pos = function
    | Empty|Chars (_,true)|Tag _ -> Some pos
    | Chars (_,false) -> Some (pos-1)
    | Seq (r1,r2) ->
        begin match size_backward pos r2 with
        | None -> None
        | Some pos  -> size_backward pos r1
        end
    | Alt (r1,r2) ->
        let pos1 = size_backward pos r1
        and pos2 = size_backward pos r2 in
        if pos1=pos2 then pos1 else None
    | Star _ -> None
    | Action _ -> assert false in


  let rec simple_backward pos r = match r with
    | Tag n ->
        if StringSet.mem n.id double_vars then
          r,Some pos
        else begin
          Hashtbl.add env (n.id,n.start) (Sum (End, pos)) ;
          Empty,Some pos
        end
    | Empty -> r,Some pos
    | Chars (_,is_eof) ->
        r,Some (if is_eof then pos else pos-1)
    | Seq (r1,r2) ->
        let r2,pos = simple_backward pos r2 in
        begin match pos with
        | None -> mk_seq r1 r2,None
        | Some pos ->
            let r1,pos = simple_backward pos r1 in
            mk_seq r1 r2,pos
        end
    | Alt (r1,r2) ->
        let pos1 = size_backward pos r1
        and pos2 = size_backward pos r2 in
        r,(if pos1=pos2 then pos1 else None)
    | Star _ -> r,None
    | Action _ -> assert false in

  let r =
    if opt then
      let r,_ = simple_forward 0 r in
      let r,_ = simple_backward 0 r in
      r
    else
      r in

  let loc_count = ref 0 in
  let get_tag_addr t =
    try
     Hashtbl.find env t
    with
    | Not_found ->
        let n = !loc_count in
        incr loc_count ;
        Hashtbl.add env t (Sum (Mem n,0)) ;
        Sum (Mem n,0) in

  let rec alloc_exp pos r = match r with
    | Tag n ->
        if StringSet.mem n.id double_vars then
          r,pos
        else begin match pos with
        | Some a ->
            Hashtbl.add env (n.id,n.start) a ;
            Empty,pos
        | None ->
            let a = get_tag_addr (n.id,n.start) in
            r,Some a
        end
          
    | Empty -> r,pos
    | Chars (_,is_eof) -> r,(if is_eof then pos else add_pos pos 1)
    | Seq (r1,r2) ->
        let r1,pos = alloc_exp pos r1 in
        let r2,pos = alloc_exp pos r2 in
        mk_seq r1 r2,pos
    | Alt (_,_) ->
        let off = size_forward 0 r in
        begin match off with
        | Some i -> r,add_pos pos i
        | None -> r,None
        end
    | Star _ -> r,None
    | Action _ -> assert false in

  let r,_ = alloc_exp None r in
  let m =      
    StringSet.fold
      (fun x r ->
        let v =
          if StringSet.mem x char_vars then
            Ident_char
              (StringSet.mem x optional_vars, get_tag_addr (x,true))
          else
            Ident_string
              (StringSet.mem x optional_vars,
               get_tag_addr (x,true),
               get_tag_addr (x,false)) in
        (x,v)::r)
      all_vars [] in
  m,r, !loc_count

        
  
let encode_casedef casedef =
  let r =
    List.fold_left
      (fun (reg,actions,count,ntags) (expr, act) ->
        let expr = remove_nested_as expr in
        let char_vars = find_chars expr in
        let r = encode_regexp char_vars count expr
        and opt_vars = find_optional expr
        and double_vars,all_vars = find_double expr in
        let m,r,loc_ntags =
          opt_regexp all_vars char_vars opt_vars double_vars r in
        Alt(reg, Seq(r, Action count)),
        (count, m ,act) :: actions,
        (succ count),
        max loc_ntags ntags)
      (Empty, [], 0, 0)
      casedef in
  r

let encode_lexdef def =
  chars := [];
  chars_count := 0;
  let entry_list =
    List.map
      (fun {name=entry_name ; args=args ; shortest=shortest ; clauses= casedef} ->
        let (re,actions,_,ntags) = encode_casedef casedef in
        { lex_name = entry_name;
          lex_regexp = re;
          lex_mem_tags = ntags ;
          lex_actions = List.rev actions },args,shortest)
      def in
  let chr = Array.of_list (List.rev !chars) in
  chars := [];
  (chr, entry_list)

(* To generate directly a NFA from a regular expression.
     Confer Aho-Sethi-Ullman, dragon book, chap. 3 
   Extension to tagged automata.
     Confer
       Ville Larikari
      ``NFAs with Tagged Transitions, their Conversion to Deterministic
        Automata and Application to Regular Expressions''.
       Symposium on String Processing and Information Retrieval (SPIRE 2000),
     http://kouli.iki.fi/~vlaurika/spire2000-tnfa.ps
(See also)
     http://kouli.iki.fi/~vlaurika/regex-submatch.ps.gz
*)

type t_transition =
    OnChars of int 
  | ToAction of int

type transition = t_transition * Tags.t

let compare_trans (t1,tags1) (t2,tags2) =
  match Pervasives.compare  t1 t2 with
  | 0 -> Tags.compare tags1 tags2
  | r -> r


module TransSet =
  Set.Make(struct type t = transition let compare = compare end)

let rec nullable = function
  | Empty|Tag _ -> true
  | Chars (_,_)|Action _ -> false
  | Seq(r1,r2) -> nullable r1 && nullable r2
  | Alt(r1,r2) -> nullable r1 || nullable r2
  | Star r     -> true

let rec emptymatch = function
  | Empty | Chars (_,_) | Action _ -> Tags.empty
  | Tag t       -> Tags.add t Tags.empty
  | Seq (r1,r2) -> Tags.union (emptymatch r1) (emptymatch r2)
  | Alt(r1,r2)  ->
      if nullable r1 then
        emptymatch r1
      else
        emptymatch r2
  | Star r ->
      if nullable r then
        emptymatch r
      else
        Tags.empty

let addtags transs tags =
  TransSet.fold
    (fun (t,tags_t) r -> TransSet.add (t, Tags.union tags tags_t) r)
    transs TransSet.empty


let rec firstpos = function
    Empty|Tag _ -> TransSet.empty
  | Chars (pos,_) -> TransSet.add (OnChars pos,Tags.empty) TransSet.empty
  | Action act -> TransSet.add (ToAction act,Tags.empty) TransSet.empty
  | Seq(r1,r2) ->
      if nullable r1 then
        TransSet.union (firstpos r1) (addtags (firstpos r2) (emptymatch r1))
      else
        firstpos r1
  | Alt(r1,r2) -> TransSet.union (firstpos r1) (firstpos r2)
  | Star r     -> firstpos r


(* Berry-sethi followpos *)
let followpos size entry_list =
  let v = Array.create size TransSet.empty in
  let rec fill s = function
    | Empty|Action _|Tag _ -> ()
    | Chars (n,_) -> v.(n) <- s
    | Alt (r1,r2) ->
        fill s r1 ; fill s r2
    | Seq (r1,r2) ->
        fill
          (if nullable r2 then
            TransSet.union (firstpos r2) (addtags s (emptymatch r2))
          else
            (firstpos r2))
          r1 ;
        fill s r2
    | Star r ->
        fill (TransSet.union (firstpos r) s) r in
  List.iter (fun (entry,_,_) -> fill TransSet.empty entry.lex_regexp) entry_list ;
  v
  
(************************)
(* The algorithm itself *)
(************************)

let no_action = max_int

module StateSet =
  Set.Make (struct type t = t_transition let compare = Pervasives.compare end)


module MemMap =
  Map.Make (struct type t = int let compare = Pervasives.compare end)

type 'a dfa_state =
  {final : int * ('a * int TagMap.t) ;
   others : ('a * int TagMap.t) MemMap.t}

(*
let dtag oc t =
  fprintf oc "%s<%s>" t.id (if t.start then "s" else "e")

let dmem_map dp ds m =
  MemMap.iter
    (fun k x ->
      eprintf "%d -> " k ; dp x ; ds ())
    m

and dtag_map dp ds m =
  TagMap.iter
    (fun t x ->
      dtag stderr t ; eprintf " -> " ; dp x ; ds ())
    m

let dstate {final=(act,(_,m)) ; others=o} =
  if act <> no_action then begin
    eprintf "final=%d " act ;
    dtag_map (fun x -> eprintf "%d" x) (fun () -> prerr_string " ,") m ;
    prerr_endline ""
  end ;
  dmem_map
    (fun (_,m) ->
      dtag_map (fun x -> eprintf "%d" x) (fun () -> prerr_string " ,") m)
    (fun () -> prerr_endline "")
    o
*)
  
let dfa_state_empty =
  {final=(no_action, (max_int,TagMap.empty)) ;
   others=MemMap.empty}

and dfa_state_is_empty {final=(act,_) ; others=o} =
  act = no_action &&
  o = MemMap.empty

  
(* A key is an abstraction on a dfa state,
   two states with the same key can be made the same by
   copying some memory cells into others *)


module StateSetSet =
  Set.Make (struct type t = StateSet.t let compare = StateSet.compare end)

type t_equiv = {tag:tag_info ; equiv:StateSetSet.t}

module MemKey =
  Set.Make
   (struct
     type t = t_equiv

     let compare e1 e2 = match Pervasives.compare e1.tag e2.tag with
     | 0 -> StateSetSet.compare e1.equiv e2.equiv
     | r -> r
   end)

type dfa_key = {kstate : StateSet.t ; kmem : MemKey.t}

(* Map a state to its key *)
let env_to_class m =
  let env1 = 
    MemMap.fold
      (fun _ (tag,s) r ->
        try
          let ss = TagMap.find tag r in
          let r = TagMap.remove tag r in
          TagMap.add tag (StateSetSet.add s ss) r
        with 
        | Not_found ->
            TagMap.add tag (StateSetSet.add s StateSetSet.empty) r)
      m TagMap.empty in
  TagMap.fold
    (fun tag ss r -> MemKey.add {tag=tag ; equiv=ss} r)
    env1 MemKey.empty


(* trans is nfa_state, m is associated memory map *)
let inverse_mem_map trans m r =
  TagMap.fold
    (fun tag addr r ->
      try
        let otag,s = MemMap.find addr r in
        assert (tag = otag) ;
        let r = MemMap.remove addr r in
        MemMap.add addr (tag,StateSet.add trans s) r
      with
      | Not_found ->
          MemMap.add addr (tag,StateSet.add trans StateSet.empty) r)
    m r

let inverse_mem_map_other n (_,m) r = inverse_mem_map (OnChars n) m r

let get_key {final=(act,(_,m_act)) ; others=o} =
  let env =
    MemMap.fold inverse_mem_map_other
      o
      (if act = no_action then MemMap.empty
      else inverse_mem_map (ToAction act) m_act MemMap.empty) in
  let state_key =
    MemMap.fold (fun n _ r -> StateSet.add (OnChars n) r) o
      (if act=no_action then StateSet.empty
      else StateSet.add (ToAction act) StateSet.empty) in
  let mem_key = env_to_class  env in
  {kstate = state_key ; kmem = mem_key}


let key_compare k1 k2 = match StateSet.compare k1.kstate k2.kstate with
| 0 -> MemKey.compare k1.kmem k2.kmem
| r -> r

(* Association dfa_state -> state_num *)
        
module StateMap =
  Map.Make(struct type t = dfa_key let compare = key_compare end)

let state_map = ref (StateMap.empty : int StateMap.t)
let todo = Stack.create() 
let next_state_num = ref 0
let next_mem_cell = ref 0
let temp_pending = ref false
let tag_cells = Hashtbl.create 17 
let state_table = Table.create dfa_state_empty


let reset_state_mem () =
  state_map := StateMap.empty;
  Stack.clear todo;
  next_state_num := 0 ;  
  let _ = Table.trim state_table in
  ()

(* Allocation of memory cells *)
let reset_cell_mem ntags =
  next_mem_cell := ntags ;
  Hashtbl.clear tag_cells ;
  temp_pending := false

let do_alloc_temp () =
  temp_pending := true ;
  let n = !next_mem_cell in
  n

let do_alloc_cell used t =
  let available =
    try Hashtbl.find tag_cells t with Not_found -> Ints.empty in
  try
    Ints.choose (Ints.diff available used)
  with
  | Not_found ->
      temp_pending := false ;
      let n = !next_mem_cell in
      if n >= 255 then raise Memory_overflow ;
      Hashtbl.replace tag_cells t (Ints.add n available) ;
      incr next_mem_cell ;
      n

let is_old_addr a = a >= 0
and is_new_addr a = a < 0

let old_in_map m r =
  TagMap.fold
    (fun _ addr r ->
      if is_old_addr addr then
        Ints.add addr r
      else
        r)
    m r

let alloc_map used m mvs =
  TagMap.fold
    (fun tag a (r,mvs) ->
      let a,mvs =
        if is_new_addr a then
          let a = do_alloc_cell used tag in
          a,Ints.add a mvs
        else a,mvs in
      TagMap.add tag a r,mvs)
    m (TagMap.empty,mvs)

let create_new_state {final=(act,(_,m_act)) ; others=o} =
  let used =
    MemMap.fold (fun _ (_,m) r -> old_in_map m r)
      o (old_in_map m_act Ints.empty) in

  let new_m_act,mvs  = alloc_map used m_act Ints.empty in
  let new_o,mvs =
    MemMap.fold (fun k (x,m) (r,mvs) ->
      let m,mvs = alloc_map used m mvs in
      MemMap.add k (x,m) r,mvs)
      o (MemMap.empty,mvs) in
  {final=(act,(0,new_m_act)) ; others=new_o},
  Ints.fold (fun x r -> Set x::r) mvs []

type new_addr_gen = {mutable count : int ; mutable env : int TagMap.t}

let create_new_addr_gen () = {count = -1 ; env = TagMap.empty}

let alloc_new_addr tag r =
  try
    TagMap.find tag r.env
  with
  | Not_found ->
      let a = r.count in
      r.count <- a-1 ;
      r.env <- TagMap.add tag a r.env ;
      a


let create_mem_map tags gen =
  Tags.fold
    (fun tag r -> TagMap.add tag (alloc_new_addr tag gen) r)
    tags TagMap.empty

let create_init_state pos =
  let gen = create_new_addr_gen () in
  let st =
    TransSet.fold
      (fun (t,tags) st ->
        match t with
        | ToAction n ->
            let on,otags = st.final in
            if n < on then
              {st with final = (n, (0,create_mem_map tags gen))}
            else
              st
        | OnChars n ->
            try
              let _ = MemMap.find n st.others in assert false
            with
            | Not_found ->
                {st with others =
                  MemMap.add n (0,create_mem_map tags gen) st.others})
      pos dfa_state_empty in
  st


let get_map t st = match t with
| ToAction _ -> let _,(_,m) = st.final in m
| OnChars n  ->
    let (_,m) = MemMap.find n st.others in
    m

let dest = function | Copy (d,_) | Set d  -> d
and orig = function | Copy (_,o) -> o | Set _ -> -1 

let pmv oc mv = fprintf oc "%d <- %d" (dest mv) (orig mv)
let pmvs oc mvs =
  List.iter (fun mv -> fprintf oc "%a " pmv  mv) mvs ;
  output_char oc '\n' ; flush oc

    
(* Topological sort << a la louche >> *)
let sort_mvs mvs =
  let rec do_rec r mvs = match mvs with
  | [] -> r
  | _  ->
      let dests =
        List.fold_left
          (fun r mv -> Ints.add (dest mv) r)
          Ints.empty mvs in
      let rem,here =
        List.partition
          (fun mv -> Ints.mem (orig mv) dests)
          mvs in
      match here with
      | [] ->
          begin match rem with
          | Copy (d,_)::_ ->
              let d' = do_alloc_temp () in
              Copy (d',d)::
              do_rec r
                (List.map
                   (fun mv ->
                     if orig mv = d then
                       Copy (dest mv,d')
                     else
                       mv)
                   rem)
          | _ -> assert false
          end
      | _  -> do_rec (here@r) rem  in
  do_rec [] mvs
      
let move_to mem_key src tgt =
  let mvs =
    MemKey.fold
      (fun {tag=tag ; equiv=m} r ->
        StateSetSet.fold
          (fun s r ->
            try
              let t = StateSet.choose s  in
              let src = TagMap.find tag (get_map t src)
              and tgt = TagMap.find tag (get_map t tgt) in
              if src <> tgt then begin
                if is_new_addr src then
                  Set tgt::r
                else
                  Copy (tgt, src)::r
              end else
                r
            with
            | Not_found -> assert false)
          m r)
      mem_key [] in
(* Moves are topologically sorted *)
  sort_mvs mvs


let get_state st = 
  let key = get_key st in
  try
    let num = StateMap.find key !state_map in
    num,move_to key.kmem st (Table.get state_table num)
  with Not_found ->
    let num = !next_state_num in
    incr next_state_num;
    let st,mvs = create_new_state st in
    Table.emit state_table st ;
    state_map := StateMap.add key num !state_map;
    Stack.push (st, num) todo;
    num,mvs

let map_on_all_states f old_res =
  let res = ref old_res in
  begin try
    while true do
      let (st, i) = Stack.pop todo in
      let r = f st in
      res := (r, i) :: !res
    done
  with Stack.Empty -> ()
  end;
  !res

let goto_state st =
  if
    dfa_state_is_empty st
  then
    Backtrack,[]
  else
    let n,moves = get_state st in
    Goto n,moves

(****************************)
(* compute reachable states *)
(****************************)

let add_tags_to_map gen tags m =
  Tags.fold
    (fun tag m ->
      let m = TagMap.remove tag m in
      TagMap.add tag (alloc_new_addr tag gen) m)
    tags m

let apply_transition gen r pri m = function
  | ToAction n,tags ->
      let on,(opri,_) = r.final in
      if n < on || (on=n && pri < opri) then 
        let m = add_tags_to_map gen tags m in
        {r with final=n,(pri,m)}
      else r
  |  OnChars n,tags ->
      try
        let (opri,_) = MemMap.find n r.others in
        if pri < opri then
          let m = add_tags_to_map gen tags m in          
          {r with others=MemMap.add n (pri,m) (MemMap.remove n r.others)}
        else
          r
      with
      | Not_found ->
          let m = add_tags_to_map gen tags m in
          {r with others=MemMap.add n (pri,m) r.others}

(* add transitions ts to new state r
   transitions in ts start from state pri and memory map m
*)
let apply_transitions gen r pri m ts =
  TransSet.fold
    (fun t r -> apply_transition gen r pri m t)
    ts r


(* For a given nfa_state pos, refine char partition *)  
let rec split_env gen follow pos m s = function
  | [] -> (* Can occur ! because of non-matching regexp ([^'\000'-'\255']) *)
      []
  | (s1,st1) as p::rem ->
      let here = Cset.inter s s1 in
      if Cset.is_empty here then
        p::split_env gen follow pos m s rem
      else
        let rest = Cset.diff s here in
        let rem =
          if Cset.is_empty rest then
            rem
          else
            split_env gen follow pos m rest rem
        and new_st = apply_transitions gen st1 pos m follow in        
        let stay = Cset.diff s1 here in
        if Cset.is_empty stay then
          (here, new_st)::rem
        else
          (stay, st1)::(here, new_st)::rem
        

(* For all nfa_state pos in a dfa state st *)
let comp_shift gen chars follow st =
  MemMap.fold
    (fun pos (_,m) env -> split_env gen follow.(pos) pos m chars.(pos) env)
    st [Cset.all_chars_eof,dfa_state_empty]
        

let reachs chars follow st =
  let gen = create_new_addr_gen () in
(* build a association list (char set -> new state) *)
  let env = comp_shift gen chars follow st in
(* change it into (char set -> new state_num) *)
  let env =
    List.map
      (fun (s,dfa_state) -> s,goto_state dfa_state) env in
(* finally build the char indexed array -> new state num *)
  let shift = Cset.env_to_array env in
  shift  


let get_tag_mem n env t =
  try
    TagMap.find t env.(n)
  with
  | Not_found -> assert false

let do_tag_actions n env  m =

  let used,r =
    TagMap.fold (fun t m (used,r) ->
      let a = get_tag_mem n env t in
      Ints.add a used,SetTag (a,m)::r) m (Ints.empty,[]) in
  let _,r =
    TagMap.fold
      (fun tag m (used,r) ->
        if not (Ints.mem m used) && tag.start then
          Ints.add m used, EraseTag m::r
        else
          used,r)
      env.(n) (used,r) in
  r
  
    
let translate_state shortest_match tags chars follow st =
  let (n,(_,m)) = st.final in
  if MemMap.empty = st.others then
    Perform (n,do_tag_actions n tags m)
  else if shortest_match then begin
    if n=no_action then
      Shift (No_remember,reachs chars follow st.others)
    else
      Perform(n, do_tag_actions n tags m)
  end else begin
    Shift (
    (if n = no_action then
      No_remember
    else
      Remember (n,do_tag_actions n tags m)),
    reachs chars follow st.others)
  end

(*
let dtags chan tags =
  Tags.iter
    (fun t -> fprintf chan " %a" dtag t)
    tags
  
let dtransset s =
  TransSet.iter
    (fun trans -> match trans with
    | OnChars i,tags ->
        eprintf " (-> %d,%a)" i dtags tags
    | ToAction i,tags ->
        eprintf " ([%d],%a)" i dtags tags)
    s

let dfollow t =
  eprintf "follow=[" ;
  for i = 0 to Array.length t-1 do
    eprintf "%d:" i ;
    dtransset t.(i)
  done ;
  prerr_endline "]"
*)

let make_tag_entry id start act a r = match a with
  | Sum (Mem m,0) ->
      TagMap.add {id=id ; start=start ; action=act} m r
  | _ -> r

let extract_tags l =
  let envs = Array.create (List.length l) TagMap.empty in
  List.iter
    (fun (act,m,_) ->
      envs.(act) <-
         List.fold_right
           (fun (x,v) r -> match v with
           | Ident_char (_,t) -> make_tag_entry x true act t r
           | Ident_string (_,t1,t2) ->
               make_tag_entry x true act t1
               (make_tag_entry x false act t2 r))
           m TagMap.empty)
    l ;
  envs


let make_dfa lexdef =
  let (chars, entry_list) = encode_lexdef lexdef in
  let follow = followpos (Array.length chars) entry_list in
(*
  dfollow follow ;
*)
  reset_state_mem () ;
  let r_states = ref [] in
  let initial_states =
    List.map
      (fun (le,args,shortest) ->
        let tags = extract_tags le.lex_actions in
        reset_cell_mem le.lex_mem_tags ;
        let pos_set = firstpos le.lex_regexp in
(*
        prerr_string "trans={" ; dtransset pos_set ; prerr_endline "}" ;
*)
        let init_state = create_init_state pos_set in
        let init_num = get_state init_state in
        r_states :=
           map_on_all_states
             (translate_state shortest tags chars follow) !r_states ;
        { auto_name = le.lex_name;
          auto_args = args ; 
          auto_mem_size =
            (if !temp_pending then !next_mem_cell+1 else !next_mem_cell) ;
          auto_initial_state = init_num ;
          auto_actions = le.lex_actions })
      entry_list in
  let states = !r_states in
(*
  prerr_endline "** states **" ;
  for i = 0 to !next_state_num-1 do
    eprintf "+++ %d +++\n" i ;
    dstate (Table.get state_table i) ;
    prerr_endline ""
  done ;
  eprintf "%d states\n" !next_state_num ;
*)
  let actions = Array.create !next_state_num (Perform (0,[])) in
  List.iter (fun (act, i) -> actions.(i) <- act) states;
  reset_state_mem () ;
  reset_cell_mem  0 ;
  (initial_states, actions)
